footballers <- feather::read_feather("/Users/shabdul/Desktop/Blog_Images/complete.feather")
nrow(footballers)
## [1] 17994
It contains 17,994 footballers from differnet nations and clubs
strikers <- footballers[(footballers$prefers_st ==TRUE |
footballers$prefers_cf ==TRUE),
]
nrow(strikers)
## [1] 3409
There are 3,409 strikers
Let’s analyze age of strikers
plot_ly(x = strikers$age, opacity = 0.6, type = "histogram") %>%
layout(barmode="overlay",
title = "Age distribution of strikers",
xaxis = list(title = "Age"),
yaxis = list(title = "Number of Strikers")
)
summary(strikers$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 16.00 21.00 25.00 24.86 28.00 38.00
We see the 3rd quartile for strikers age is 28
# Let's look at strikers above or equal to 28 years of age
strikers_28orAbove <- footballers[(footballers$prefers_st ==TRUE |
footballers$prefers_cf ==TRUE)
& footballers$age >= 28 ,
]
nrow(strikers_28orAbove)
## [1] 975
We see 975 strikers aged 28 or more
Let’s look at correlation between various skill atttributes of these strikers
# Let's try to look at the correlation between the skill atttributes of these strikers
correlation_dataframe <- as.data.frame(strikers_28orAbove[,sapply(strikers_28orAbove, function(x) is.numeric(x))])
drops <- c("ram","rcm","rdm","rcb","rwb","cam","cdm",
"lwb","lam","lcm","ldm","lcb",
"rw","rf","rs","rm","rb","st","lw","cf","cm","lm","lb","cb","ls","lf","gk",
"gk_diving","gk_handling","gk_kicking","gk_positioning","gk_reflexes",
"ID","eur_release_clause","potential",
"def","marking","interceptions",
"sliding_tackle","standing_tackle"
)
correlation_dataframe <- correlation_dataframe[ , !(names(correlation_dataframe) %in% drops)]
# Look at correlation plot
M <- cor(correlation_dataframe)
corrplot(M, method = "circle")
You notice some obvious trends here. Shoaib will call out a couple of them here…. Pace, Stamina, Composure, Penalties and Freekicks seem interesting
top_3_pace <- strikers_28orAbove %>%
filter(rank(desc(pac))<=3)
img <- c(image_read(top_3_pace$flag[1]), image_read(top_3_pace$flag[2]), image_read(top_3_pace$flag[3]))
image_append(image_scale(img, "x100"))
top_3_stamina<- strikers_28orAbove %>%
filter(rank(desc(stamina))<=3)
img <- c(image_read(top_3_stamina$flag[1]), image_read(top_3_stamina$flag[2]), image_read(top_3_stamina$flag[3]))
image_append(image_scale(img, "x100"))
plot_ly(strikers_28orAbove, x = ~age) %>%
add_markers(y = ~stamina, text = ~name, color = I("red"), opacity=0.5, name = "Stamina", showlegend = TRUE) %>%
add_markers(y = ~pac, text = ~name, color = I("blue"), opacity=0.5, name = "Pace", showlegend = TRUE) %>%
add_lines(y = ~fitted(loess(pac ~ age)),
line = list(color = '#1370D3',opacity=1),
name = "Pace Trend", showlegend = TRUE) %>%
add_lines(y = ~fitted(loess(stamina ~ age)),
line = list(color = '#FF3F33',opacity=1),
name = "Stamina Trend", showlegend = TRUE) %>%
layout(xaxis = list(title = 'Age'),
yaxis = list(title = 'Pace / Stamina'),
title = "Impact of Age on Pace and Stamina",
legend = list(x = 0.80, y = 0.90))
top_2_penalties <- strikers_28orAbove %>%
filter(rank(desc(penalties))<=3)
img <- c(image_read(top_2_penalties$flag[1]), image_read(top_2_penalties$flag[2]))
image_append(image_scale(img, "x100"))
top_3_free_kick_accuracy<- strikers_28orAbove %>%
filter(rank(desc(free_kick_accuracy))<=3)
img <- c(image_read(top_3_free_kick_accuracy$flag[1]), image_read(top_3_free_kick_accuracy$flag[2]), image_read(top_3_free_kick_accuracy$flag[3]))
image_append(image_scale(img, "x100"))
plot_ly(strikers_28orAbove, x = ~age) %>%
add_markers(y = ~penalties, text = ~name, color = I("red"), opacity=0.5, name = "Penalties", showlegend = TRUE) %>%
add_markers(y = ~free_kick_accuracy, text = ~name, color = I("blue"), opacity=0.5, name = "Free kick accuracy", showlegend = TRUE) %>%
add_lines(y = ~fitted(loess(penalties ~ age)),
line = list(color = '#1370D3',opacity=1),
name = "Penalties Trend", showlegend = TRUE) %>%
add_lines(y = ~fitted(loess(free_kick_accuracy ~ age)),
line = list(color = '#FF3F33',opacity=1),
name = "Free kick_accuracy Trend", showlegend = TRUE) %>%
layout(xaxis = list(title = 'Age'),
yaxis = list(title = 'Penalties / Free_kick_accuracy'),
title = "Impact of Age on Penalties and Free kick accuracy",
legend = list(x = 0.80, y = 0.90))
top_3_composure<- strikers_28orAbove %>%
filter(rank(desc(composure))<=3)
img <- c(image_read(top_3_composure$flag[1]), image_read(top_3_composure$flag[2]), image_read(top_3_composure$flag[3]))
image_append(image_scale(img, "x100"))
top_3_aggression<- strikers_28orAbove %>%
filter(rank(desc(aggression))<=3)
img <- c(image_read(top_3_aggression$flag[1]), image_read(top_3_aggression$flag[2]), image_read(top_3_aggression$flag[3]))
image_append(image_scale(img, "x100"))
plot_ly(strikers_28orAbove, x = ~age) %>%
add_markers(y = ~composure, text = ~name, color = I("red"), opacity=0.5, name = "Composure", showlegend = TRUE) %>%
add_markers(y = ~aggression, text = ~name, color = I("blue"), opacity=0.5, name = "Aggression", showlegend = TRUE) %>%
add_lines(y = ~fitted(loess(composure ~ age)),
line = list(color = '#1370D3',opacity=1),
name = "Composure Trend", showlegend = TRUE) %>%
add_lines(y = ~fitted(loess(aggression ~ age)),
line = list(color = '#FF3F33',opacity=1),
name = "Aggression Trend", showlegend = TRUE) %>%
layout(xaxis = list(title = 'Age'),
yaxis = list(title = 'Composure / Aggression'),
title = "Impact of Age on Composure and Aggression",
legend = list(x = 0.80, y = 0.90))
strikers_above25_spread <- strikers_28orAbove %>% group_by(nationality) %>% summarise(n = n()) %>% arrange(desc(n))
head(arrange(strikers_above25_spread, desc(n)), n=10)
## # A tibble: 10 x 2
## nationality n
## <chr> <int>
## 1 Brazil 79
## 2 England 75
## 3 Argentina 73
## 4 Germany 49
## 5 Italy 49
## 6 Spain 48
## 7 Chile 39
## 8 France 39
## 9 Colombia 34
## 10 Japan 34
df <- read.csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv',stringsAsFactors = FALSE)
# adding 13 manual entries which are missing from the csv
df <- rbind(df[,c('COUNTRY','CODE')],c("England","GBR"),
c("Bosnia Herzegovina","BIH"),
c("DR Congo","COD"),
c("Ivory Coast","CIV"),
c("Cape Verde","CPV"),
c("Republic of Ireland","IRL"),
c("FYR Macedonia","MKD"),
c("Korea Republic","KOR"),
c("Congo","COG"),
c("China PR","CHN"),
c("Guinea Bissa","GNB"),
c("Trinidad & Tobago","TTO"),
c("Gambia","GMB"),
c("Antigua & Barbuda","ATG")
)
location_mapping <- df %>% group_by(COUNTRY) %>% summarise(n = CODE)
location_mapping <- data.frame(location_mapping)
strikers_above25_spread <- data.frame(strikers_above25_spread)
strikers_above25_spread$code <- location_mapping$n[match(strikers_above25_spread$nationality,location_mapping$COUNTRY)]
# adding light grey boundaries
l <- list(color = toRGB("grey"), width = 0.5)
# specifying map projection/options
g <- list(
showframe = FALSE,
showcoastlines = FALSE,
projection = list(type = 'Mercator')
)
p <- plot_geo(strikers_above25_spread) %>%
add_trace(
z = ~n, color = ~n, colors = 'Blues',
text = ~nationality, locations = ~code, marker = list(line = l)
) %>%
colorbar(title = 'Population', tickprefix = '') %>%
layout(
title = 'Population Spread of Strikers above 25',
geo = g
)
p